﻿Imports System.Net
Imports System.IO
Imports System.Text.RegularExpressions

Public Class hujiangEnglish
    Event 选择(ByVal 类型 As Integer, ByVal 网址 As String)

    Private Sub btn采集_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles btn采集.Click

        Me.BackgroundWorker1.RunWorkerAsync()
    End Sub
    ''' 
    '''pid 1 为标准VOA 2为慢速VOA
    '''
    Private Function 沪江采集(ByVal pageCount As Integer, ByVal pid As Integer) As DataTable
        Dim dt As New DataTable
        dt.Columns.Add("日期")
        dt.Columns.Add("标题")
        dt.Columns.Add("网址")
        Dim dc As DataColumn = dt.Columns.Add("是否采集过", GetType(Boolean))
        dc.DefaultValue = False

        dt.Columns.Add("能否采集", GetType(Boolean))
        dt.Columns.Add("打开")
        Dim strContent As String
        Dim strPattern As String
        Dim mcs As MatchCollection
        Dim mc As Match

        Dim rs As ADODB.Recordset
        Dim progress As Integer = 0
        '沪江慢速 
        For page As Integer = 1 To pageCount
            strContent = 采集("http://voa.hjenglish.com/list.aspx?page=" & page & "&pid=" & pid)
            '<span class="voa_list_title">([\s\S]+?)<a href="([\s\S]+?)" target="_blank" title="([\s\S]+?)"[\S\s]+?<span class="gray">
            strPattern = "<span class=""voa_list_title"">(?<riqi>[\s\S]+?)<a href=""(?<url>[\s\S]+?)"" target=""_blank"" title=""(?<content>[\s\S]+?)""[\S\s]+?<span class=""gray"">"
            mcs = Regex.Matches(strContent, strPattern)
            progress += 20
            Me.BackgroundWorker1.ReportProgress(progress)
            If (mcs.Count > 0) Then
                For index As Integer = 0 To mcs.Count - 1
                    mc = mcs(index)
                    Dim dr As DataRow = dt.NewRow
                    dr("日期") = mc.Groups("riqi").Value.Replace("&nbsp;", "").Trim
                    dr("标题") = mc.Groups("content").Value.Trim
                    dr("网址") = "http://voa.hjenglish.com" & mc.Groups("url").Value.Trim
                    rs = ct.GetTable("select * from XSCCODE where url='" & dr("网址") & "'")
                    If (Not rs.EOF) Then
                        dr("是否采集过") = True
                        rs.Close()
                    Else
                        rs.Close()
                        rs = ct.GetTable("select * from XSCCODE where url='" & "http://voa.hjenglish.com/sp/view/" & CStr(dr("日期")).Replace("-", "") & "'")
                        If (Not rs.EOF) Then
                            dr("是否采集过") = True
                        End If
                        rs.Close()
                    End If
                    rs = ct.GetTable("select * from XSCCODE where url='" & dr("网址") & "'")
                    If (Not rs.EOF) Then
                        dr("是否采集过") = True
                    End If
                    If Not mc.Groups(0).Value.Contains("images/icon_new.gif") Then
                        dr("能否采集") = True
                    Else
                        dr("能否采集") = False
                    End If

                    dr("打开") = "打开"
                    dt.Rows.Add(dr)
                Next
                progress += 5
                Me.BackgroundWorker1.ReportProgress(progress)
            End If
        Next
        progress += 15
        Me.BackgroundWorker1.ReportProgress(progress)

        '按日期排序
        Dim dv As DataView = dt.DefaultView
        dv.Sort = "日期 desc"
        dt = dv.ToTable

        Return dt

    End Function

    Private Sub btn确定_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles btn确定.Click
        If Me.tabVOA.SelectedIndex = 0 Then
            If Me.dg沪江慢速VOA.SelectedRows.Count > 0 Then
                If CBool(dg沪江慢速VOA.SelectedRows(0).Cells("能否采集").Value) = False Then
                    ShowMessageBox("此行不能采集")
                    Return
                End If

                If CBool(dg沪江慢速VOA.SelectedRows(0).Cells("是否采集").Value) = True Then
                    ShowMessageBox("此行采集过")
                    Return
                End If
                RaiseEvent 选择(0, dg沪江慢速VOA.SelectedRows(0).Cells("网址").Value.ToString)
            End If
        End If

    End Sub

    Private Function 采集(ByVal url As String) As String

        Dim strResponse As String = ""

        Try
            Dim hwr As HttpWebRequest = CType(WebRequest.Create(url), HttpWebRequest)
            Dim hr As HttpWebResponse = hwr.GetResponse()
            Dim sr As New StreamReader(hr.GetResponseStream, True)
            strResponse = sr.ReadToEnd
            sr.Close()
            Return strResponse
        Catch ex As Exception
            Return strResponse
        End Try

    End Function

    Private Sub BackgroundWorker1_DoWork(ByVal sender As System.Object, ByVal e As System.ComponentModel.DoWorkEventArgs) Handles BackgroundWorker1.DoWork
        Me.btn采集.Enabled = False
        Me.btn确定.Enabled = False
        If tabVOA.SelectedIndex = 0 Then
            If rad沪江慢速VOA.Checked = True Then
                Me.dg沪江慢速VOA.DataSource = 沪江采集(nm页数.Text, 2)
            Else
                Me.dg沪江慢速VOA.DataSource = 沪江采集(nm页数.Text, 1)
            End If
        End If
    End Sub

    Private Sub BackgroundWorker1_ProgressChanged(ByVal sender As System.Object, ByVal e As System.ComponentModel.ProgressChangedEventArgs) Handles BackgroundWorker1.ProgressChanged
        Me.ProgressBar1.Value = e.ProgressPercentage
    End Sub

    Private Sub BackgroundWorker1_RunWorkerCompleted(ByVal sender As System.Object, ByVal e As System.ComponentModel.RunWorkerCompletedEventArgs) Handles BackgroundWorker1.RunWorkerCompleted
        Me.ProgressBar1.Value = 0
        Me.btn采集.Enabled = True
        Me.btn确定.Enabled = True
    End Sub

    Private Sub dg沪江慢速VOA_CellContentClick(ByVal sender As System.Object, ByVal e As System.Windows.Forms.DataGridViewCellEventArgs) Handles dg沪江慢速VOA.CellContentClick
        If e.RowIndex < 0 Then
            Exit Sub
        End If

        If e.ColumnIndex = Me.dg沪江慢速VOA.Columns("打开").Index Then
            Using pro As New Process
                pro.StartInfo.FileName = Me.dg沪江慢速VOA.Rows(e.RowIndex).Cells("网址").Value
                pro.Start()
            End Using
        End If
    End Sub

    Private Sub hujiangEnglish_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
        Me.dg沪江慢速VOA.AutoGenerateColumns = False
    End Sub
End Class
